import pandas as pd
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import pyodbc
import pymssql
import sqlalchemy
import holoviews as hv
from shapely import wkt
import psrcelmerpy
import ogr
import json
from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
CustomJS, CustomJSFilter,
GeoJSONDataSource, HoverTool,
LinearColorMapper, Slider)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure
from bokeh.tile_providers import get_provider, Vendors
from bokeh.plotting import figure, output_file, show, output_notebook
import plotly.graph_objects as go
import plotly.express as pex
from time import gmtime, strftime
Inrix provided us data for May 2015 and May 2019 freight movements. The data includes the following:
Trips Origin-Destination Information - includes information about trips such as trips start/end location, start/end time, origin/destination zone, trip length, etc.
Trajectory Information - contains trajectories for all of the trips listed in the trip table.
#load 2019 data
data_path = r'\\AWS-Model10\Model Data 2\INRIX'
os.chdir(data_path)
trips_header = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201905\date=2020-12-29\reportId=45749\v1', 'schema', 'TripBulkReportTripsHeaders.csv'))
trips_df = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201905\date=2020-12-29\reportId=45749\v1\data\trips\trips.csv.gz'),
compression='gzip', names = trips_header.columns)
2019 data
There are about 74,500 unique device IDs - meaning that INRIX data captures 74,500 trucks making 1.2 millions of trips in the PSRC region.
On average, each truck completed about 16 trips in May. Although median is 3 trips. Such a difference between average and median is due to outliers in the data - some trucks did more than 600 trips during May 2019, while 75% of all of the trucks have up to 7 trips.
devices =trips_df.groupby('DeviceId')['DeviceId'].count().reset_index(name='count').sort_values(by=['count'])
#devices.describe()
providers =trips_df.groupby('ProviderId')['ProviderId'].count().reset_index(name='count').sort_values(by=['count'])
#providers.describe()
#devices.tail(30)
device_test = trips_df[trips_df["DeviceId"] == '15bfd6b38a6b00cd53b5f0de44368dc9']
#device_test
trips_header_2015 = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201505\date=2020-12-29\reportId=45748\v1', 'schema', 'TripBulkReportTripsHeaders.csv'))
trips_df_2015 = pd.read_csv(os.path.join(r'\\AWS-Model10\Model Data 2\INRIX\trip_paths_usa_wa_puget_sound_201505\date=2020-12-29\reportId=45748\v1\data\trips\trips.csv.gz'),
compression='gzip', names = trips_header_2015.columns)
2015 data
There are about 87,000 unique device IDs.
Similar to the May 2019 data, the average number of trips per truck is greater than median which is indicative of outliers in the data. This means that there are some trucks that make many more trips comparing to the majority of trucks. On average, in May 2015, each truck completed about 13 trips in May. Although median is 4 trips. 75% of all of the trucks completed up to 8 trips.
The maximum number of trips recorded for one device ID is 549 trips.
devices_2015 =trips_df_2015.groupby('DeviceId')['DeviceId'].count().reset_index(name='count').sort_values(by=['count'])
#devices_2015.describe()
providers_2015 =trips_df_2015.groupby('ProviderId')['ProviderId'].count().reset_index(name='count').sort_values(by=['count'])
#providers_2015.describe()
In total, there were 1,242,924 trips recorded in 2019 data and 1,179,583 trips in 2015 data
#number of trips recroded in 2019
#len(pd.unique(trips_df['TripId']))
#number of trips recroded in 2015
#len(pd.unique(trips_df_2015['TripId']))
Vehicle types in the dataset
Inrix data includes medium trucks/vans (ranges from 14001–26000 lb) and heavy trucks (> 26000 lb) in both 2015 and 2019.
In 2019, there were 1,013,601 made by medium trucks and 229,334 trips by heavy trucks.
In 2015, there were 841,373 made by medium trucks and 338,223 trips by heavy trucks.
#trips by vehicle class in 2019
#trips_df.groupby('VehicleWeightClass').size()
#trips_df.groupby('VehicleWeightClass').DeviceId.nunique()
#trips by vehicle class in 2015
#trips_df_2015.groupby('VehicleWeightClass').size()
#trips_df_2015.groupby('VehicleWeightClass').DeviceId.nunique()
Dates covered
Data covers the following dates in 2019:
May 1st - 31st 2019
Number of trips during an average weekday ~47-55k
Number of trips during an average weekend ~9-18k
in 2015:
May 1st - 31st 2015
Number of trips during an average weekday ~41-69k
Number of trips during an average weekend ~11-24k
trips_df['StartDate_upd'] = (pd.to_datetime(trips_df['StartDate'])
.dt.tz_convert('America/Los_Angeles'))
#trips_df['StartDate_upd'] = pd.to_datetime(trips_df["StartDate"])
trips_df["StartDate_dmy"] = trips_df["StartDate_upd"].apply(lambda x: x.strftime('%d%m%Y'))
trips_df["StartTime_h"] = trips_df["StartDate_upd"].apply(lambda x: x.strftime('%H'))
trips_df['EndDate_upd'] = pd.to_datetime(trips_df["EndDate"])
trips_df["EndDate_dmy"] = trips_df["EndDate_upd"].apply(lambda x: x.strftime('%d%m%Y'))
trips_df["EndTime_h"] = trips_df["EndDate_upd"].apply(lambda x: x.strftime('%H'))
trips_df_2015['StartDate_upd'] = pd.to_datetime(trips_df_2015["StartDate"])
trips_df_2015["StartDate_dmy"] = trips_df_2015["StartDate_upd"].apply(lambda x: x.strftime('%d%m%Y'))
trips_df_2015["StartTime_h"] = trips_df_2015["StartDate_upd"].apply(lambda x: x.strftime('%H'))
trips_df["StartDOW"] = trips_df["StartDate_upd"].apply(lambda x: x.strftime('%A'))
trips_df["EndDOW"] = trips_df["EndDate_upd"].apply(lambda x: x.strftime('%A'))
trips_df_2015["StartDOW"] = trips_df_2015["StartDate_upd"].apply(lambda x: x.strftime('%A'))
#weekday trips only
weekdays = ["Monday", "Tuesday","Wednesday","Thursday","Friday"]
trips_df_weekdays = trips_df[trips_df.StartDOW.isin(weekdays)]
end_trips_df_weekdays = trips_df[trips_df.EndDOW.isin(weekdays)]
trips_df_2015_weekdays = trips_df_2015[trips_df_2015.StartDOW.isin(weekdays)]
#separating heavy and medium trucks
trips_df_heavy = trips_df_weekdays[trips_df_weekdays['VehicleWeightClass']==3]
trips_df_medium = trips_df_weekdays[trips_df_weekdays['VehicleWeightClass']==2]
trips_df_heavy_2015 = trips_df_2015[trips_df_2015['VehicleWeightClass']==3]
trips_df_medium_2015 = trips_df_2015[trips_df_2015['VehicleWeightClass']==2]
end_trips_df_heavy = end_trips_df_weekdays[end_trips_df_weekdays['VehicleWeightClass']==3]
end_trips_df_medium = end_trips_df_weekdays[end_trips_df_weekdays['VehicleWeightClass']==2]
Trips by type of truck and time of day during a weekday
m_trips_hour = trips_df_medium.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
m_trips_hour_agg = m_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
end_m_trips_hour = end_trips_df_medium.groupby(['EndDOW','EndTime_h',"StartDate_dmy"]).size().reset_index(name='count')
end_m_trips_hour_agg = end_m_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2398152822.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. m_trips_hour_agg = m_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index() C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2398152822.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. end_m_trips_hour_agg = end_m_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()
Medium Trucks Volume by day of week and hour of the day
In May 2019, medium truck volumes are higher Tuesday, Wednesday, and Thursday. Medium truck volumes are lower during Monday and Friday. The reason that an average Mondays' volumes are lower during May 2019 is due to Memorial Day.
On average, all weekdays have two pronounced AM and noon peaks: one at 7 AM and another one at 11 AM-12 PM.
output_notebook()
p = figure(title="Medium Trucks Volume by Hour", x_axis_label='Time', y_axis_label='Number of Trips')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Monday'],
legend_label="Monday", line_width=2)
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Tuesday'],
legend_label="Tuesday", line_width=2,line_color='#d95f02')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Wednesday'],
legend_label="Wednesday", line_width=2,line_color='#7570b3')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Thursday'],
legend_label="Thursday", line_width=2,line_color='#e7298a')
p.line(x='StartTime_h', y='count', source = m_trips_hour_agg[m_trips_hour_agg['StartDOW'] == 'Friday'],
legend_label="Friday", line_width=2,line_color='#1b9e77')
hover = HoverTool()
hover.tooltips=[
('Average Trips', '@count'),
('Time', '@StartTime_h'),
('Day of Week', '@StartDOW')
]
p.add_tools(hover)
p.legend.location = "top_left"
show(p)
h_trips_hour = trips_df_heavy.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
end_h_trips_hour = end_trips_df_heavy.groupby(['EndDOW','EndTime_h',"EndDate_dmy"]).size().reset_index(name='count')
end_h_trips_hour_agg = end_h_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\4144757500.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index() C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\4144757500.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. end_h_trips_hour_agg = end_h_trips_hour.groupby(['EndDOW','EndTime_h']).mean().reset_index()
Heavy Trucks Volume by day of week and hour of the day
In May 2019, heavy truck volumes are the highest during Tuesday, Wednesday, and Thursday. Medium truck volumes are slightly lower during Monday and Friday. Sumular to medium trucks volume, Mondays' volumes might be lower due to the Memorial Day.
Similar to the medium trucks volume, heavy trucks volume has two pronounced peaks: one at 10 AM (which is more spread out) and another one at 4 PM.
output_notebook()
p = figure(title="Heavy Trucks Volume by Hour", x_axis_label='Time', y_axis_label='Number of Trips')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Monday'],
legend_label="Monday", line_width=2)
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Tuesday'],
legend_label="Tuesday", line_width=2,line_color='#d95f02')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Wednesday'],
legend_label="Wednesday", line_width=2,line_color='#7570b3')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Thursday'],
legend_label="Thursday", line_width=2,line_color='#e7298a')
p.line(x='StartTime_h', y='count', source = h_trips_hour_agg[h_trips_hour_agg['StartDOW'] == 'Friday'],
legend_label="Friday", line_width=2,line_color='#1b9e77')
hover = HoverTool()
hover.tooltips=[
('Average Trips', '@count'),
('Time', '@StartTime_h'),
('Day of Week', '@StartDOW')
]
p.add_tools(hover)
p.legend.location = "top_left"
show(p)
h_trips_hour = trips_df_heavy.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2563645738.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. h_trips_hour_agg = h_trips_hour.groupby(['StartDOW','StartTime_h']).mean().reset_index()
h_trips_hour_2015 = trips_df_heavy_2015.groupby(['StartDOW','StartTime_h',"StartDate_dmy"]).size().reset_index(name='count')
h_trips_hour_agg_2015 = h_trips_hour_2015.groupby(['StartDOW','StartTime_h']).mean().reset_index()
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\2067774375.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. h_trips_hour_agg_2015 = h_trips_hour_2015.groupby(['StartDOW','StartTime_h']).mean().reset_index()
df_test = trips_df_weekdays.groupby(["StartDate_dmy","StartDOW"]).size()
df_test_2015 = trips_df_2015_weekdays.groupby(["StartDate_dmy","StartDOW"]).size()
df_test = pd.DataFrame(df_test)
df_test_2015 = pd.DataFrame(df_test_2015)
In this section, we have grouped the truck trips by different geographies, including:
Census Tracts
Urban Centers
Traffic Analysis Zones
import psrcelmerpy
conn = psrcelmerpy.ElmerGeoConn()
tracts = conn.read_geolayer(layer_name='tract2010')
urban_centers = conn.read_geolayer(layer_name='urban_centers')
taz = conn.read_geolayer(layer_name='taz2010')
mic = conn.read_geolayer(layer_name='micen')
safety_rest_area=conn.read_geolayer(layer_name='safety_rest_area')
private_truck_stop = conn.read_geolayer(layer_name='private_truck_stop')
trips_d = gpd.GeoDataFrame(
trips_df, geometry=gpd.points_from_xy(trips_df.EndLocLon, trips_df.EndLocLat))
trips_d.crs = "EPSG:4326"
trips_d_2015 = gpd.GeoDataFrame(
trips_df_2015, geometry=gpd.points_from_xy(trips_df_2015.EndLocLon, trips_df_2015.EndLocLat))
trips_d_2015.crs = "EPSG:4326"
total_d=len(trips_d)
total_d_2015=len(trips_d_2015)
#creating geo data frame for trips destinations during weekdays only
trips_d_weekdays = gpd.GeoDataFrame(
trips_df_weekdays, geometry=gpd.points_from_xy(trips_df_weekdays.EndLocLon, trips_df_weekdays.EndLocLat))
trips_d_weekdays.crs = "EPSG:4326"
trips_d_2015_weekdays = gpd.GeoDataFrame(
trips_df_2015_weekdays, geometry=gpd.points_from_xy(trips_df_2015_weekdays.EndLocLon, trips_df_2015_weekdays.EndLocLat))
trips_d_2015_weekdays.crs = "EPSG:4326"
trips_o = gpd.GeoDataFrame(
trips_df, geometry=gpd.points_from_xy(trips_df.StartLocLon, trips_df.StartLocLat))
trips_o.crs = "EPSG:4326"
trips_o_2015 = gpd.GeoDataFrame(
trips_df_2015, geometry=gpd.points_from_xy(trips_df_2015.StartLocLon, trips_df_2015.StartLocLat))
trips_o_2015.crs = "EPSG:4326"
tracts = tracts.to_crs(epsg=4326)
urban_centers = urban_centers.to_crs(epsg=4326)
taz = taz.to_crs(epsg=4326)
mic = mic.to_crs(epsg=4326)
safety_rest_area = safety_rest_area.to_crs(epsg=4326)
private_truck_stop = private_truck_stop.to_crs(epsg=4326)
hex_map= gpd.read_file('Y:/Centers Monitoring/Systems Monitoring/2024/Data/1_3_Population/New File Geodatabase.gdb', layer='hex_grid_4mile_region')
hex_map=hex_map.to_crs(epsg=4326)
hex_d = gpd.sjoin(hex_map, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
taz = taz.to_crs(epsg=4326)
tract_trips_o = gpd.sjoin(tracts, trips_o, how='inner', op='intersects')
tract_trips_d = gpd.sjoin(tracts, trips_d, how='inner', op='intersects')
tract_trips_o_2015 = gpd.sjoin(tracts, trips_o_2015, how='inner', op='intersects')
tract_trips_d_2015 = gpd.sjoin(tracts, trips_d_2015, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
uc_trips_o = gpd.sjoin(urban_centers, trips_o, how='inner', op='intersects')
uc_trips_d = gpd.sjoin(urban_centers, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
uc_trips_o_2015 = gpd.sjoin(urban_centers, trips_o_2015, how='inner', op='intersects')
uc_trips_d_2015 = gpd.sjoin(urban_centers, trips_d_2015, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
mic_trips_o = gpd.sjoin(mic, trips_o, how='inner', op='intersects')
mic_trips_d = gpd.sjoin(mic, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
mic_trips_o_2015 = gpd.sjoin(mic, trips_o_2015, how='inner', op='intersects')
mic_trips_d_2015 = gpd.sjoin(mic, trips_d_2015, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
taz_trips_o = gpd.sjoin(taz, trips_o, how='inner', op='intersects')
taz_trips_d = gpd.sjoin(taz, trips_d, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy): c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
test3 = tracts.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_d_weekdays = gpd.sjoin(taz, trips_d_weekdays, how='inner', op='intersects')
c:\Users\SChildress\AppData\Local\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3400: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
tract_trips_o=tract_trips_o.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
tract_trips_d=tract_trips_d.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
hex_d.columns
Index(['Id', 'GRID_ID', 'Shape_Length', 'Shape_Area', 'geometry',
'index_right', 'TripId', 'DeviceId', 'ProviderId', 'Mode', 'StartDate',
'StartWDay', 'EndDate', 'EndWDay', 'StartLocLat', 'StartLocLon',
'EndLocLat', 'EndLocLon', 'GeospatialType', 'ProviderType',
'ProviderDrivingProfile', 'VehicleWeightClass', 'ProbeSourceType',
'OriginZoneName', 'DestinationZoneName', 'EndpointType',
'TripMeanSpeedKph', 'TripMaxSpeedKph', 'TripDistanceMeters',
'MovementType', 'OriginCbg', 'DestCbg', 'StartTimezone', 'EndTimezone',
'WaypointFreqSec', 'StartQk', 'EndQk', 'StartDate_upd', 'StartDate_dmy',
'StartTime_h', 'EndDate_upd', 'EndDate_dmy', 'EndTime_h', 'StartDOW',
'EndDOW'],
dtype='object')
hex_trips_d=hex_d.groupby('GRID_ID')['GRID_ID'].count().reset_index(name='count').sort_values(by=['count'])
tract_trips_o_2015=tract_trips_o_2015.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
tract_trips_d_2015=tract_trips_d_2015.groupby('geoid10')['geoid10'].count().reset_index(name='count').sort_values(by=['count'])
uc_trips_o=uc_trips_o.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
uc_trips_d=uc_trips_d.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
mic.columns
mic_trips_d.columns
uc_trips_d.columns
urban_centers.columns
Index(['OBJECTID', 'id', 'name', 'area_feet', 'perimeter_', 'acres',
'hectares', 'area', 'perimeter', 'adoptdate', 'SDE_STATE_ID',
'category', 'Shape', 'geometry'],
dtype='object')
uc_trips_o_2015=uc_trips_o_2015.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
uc_trips_d_2015=uc_trips_d_2015.groupby('name')['name'].count().reset_index(name='count').sort_values(by=['count'])
mic_trips_o=mic_trips_o.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
mic_trips_d=mic_trips_d.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
mic_trips_o_2015=mic_trips_o_2015.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
mic_trips_d_2015=mic_trips_d_2015.groupby('mic')['mic'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_o=taz_trips_o.groupby('taz')['taz'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_d=taz_trips_d.groupby('taz')['taz'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_d_weekday=taz_trips_d_weekdays.groupby('taz')['taz'].count().reset_index(name='count').sort_values(by=['count'])
taz_trips_d_weekday["trips_per_weekday"] = taz_trips_d_weekday["count"]/23.0
When comparing Freight Volumes between Urban Centers in PSRC region, Seattle Downtown stands out with about 22,000 trip destinations and origins.
Redmond-Overlake (187 %), Everett (92 %), and Tukwila (63 %) saw the biggest increase in freight trip destinations between 2015 and 2019 data.
uc_trips_change = pd.merge(uc_trips_d_2015,uc_trips_d, how="outer", on=["name"])
uc_trips_change =uc_trips_change.rename(columns={"name": "UC Name", "count_y": "2019 Freight Destinations",
"count_x": "2015 Freight Destinations"})
uc_trips_change['delta,%'] = (uc_trips_change['2019 Freight Destinations']-uc_trips_change['2015 Freight Destinations'])*1.0/uc_trips_change['2015 Freight Destinations']
uc_trips_change.sort_values(by='delta,%', ascending=False)
uc_trips_change_acres=pd.merge(uc_trips_change, urban_centers, left_on='UC Name', right_on="name" )
uc_trips_change_acres['2019 Freight Destinations per Acre']=uc_trips_change_acres['2019 Freight Destinations']/uc_trips_change_acres['area']
uc_trips_change_acres[["UC Name", "2019 Freight Destinations", '2019 Freight Destinations per Acre']]
| UC Name | 2019 Freight Destinations | 2019 Freight Destinations per Acre | |
|---|---|---|---|
| 0 | Puyallup Downtown | 795 | 3.692344111146305214088281727 |
| 1 | Auburn | 1436 | 6.143489189125876400589344387 |
| 2 | Bremerton | 980 | 1.655976856192771363761354688 |
| 3 | Federal Way | 1231 | 6.163308840777417713431228427 |
| 4 | University Place | 1489 | 3.096248965232608097127729555 |
| 5 | Kent | 1070 | 3.659901589378871976233076029 |
| 6 | Redmond-Overlake | 4401 | 8.477278644980492216883336126 |
| 7 | Lakewood | 1670 | 5.016976482121796281685622948 |
| 8 | Bothell Canyon Park | 2681 | 4.760743570539478048956476584 |
| 9 | Redmond Downtown | 2859 | 6.605999015564738097519794029 |
| 10 | Issaquah | 3041 | 6.556192192946767440425799515 |
| 11 | Greater Downtown Kirkland | 3367 | 5.974592357440193956224954162 |
| 12 | Seattle Northgate | 2242 | 5.487325864697739663674172768 |
| 13 | Puyallup South Hill | 2935 | 3.473234307652406031429039958 |
| 14 | Seattle Uptown | 3929 | 11.73659774796639551870150390 |
| 15 | Silverdale | 3160 | 3.725962047715938484471249296 |
| 16 | Burien | 2184 | 5.089121550843779442953697358 |
| 17 | Renton | 3624 | 5.983526767514238481529287302 |
| 18 | Seattle South Lake Union | 4873 | 13.55523874772783693737493054 |
| 19 | Everett | 6867 | 10.38885099373818927714495058 |
| 20 | SeaTac | 6089 | 6.882555841562988739086366087 |
| 21 | Seattle University Community | 4290 | 5.693646778622940677944580403 |
| 22 | Bellevue | 5516 | 13.44768715998037421716736892 |
| 23 | Kirkland Totem Lake | 4349 | 5.165671377876530122203370342 |
| 24 | Lynnwood | 4135 | 5.415040528184491542456576598 |
| 25 | Tacoma Mall | 3053 | 5.311559327518764848856856226 |
| 26 | Tacoma Downtown | 7313 | 5.305193667415323837516987097 |
| 27 | Tukwila | 10885 | 12.74478381301056619015188302 |
| 28 | Seattle First Hill/Capitol Hill | 7695 | 8.412490109666598553374425904 |
| 29 | Seattle Downtown | 22289 | 23.87069931551246933980923970 |
The majority of Manufacturing Industrial Centers saw a freight growth between 2015 and 2019 freight trips data.
Duwamish and Kent MIC saw the biggest volume of freight destinations in both 2015 and 2019.
mic_trips_change = pd.merge(mic_trips_d_2015,mic_trips_d, how="outer", on=["mic"])
mic_trips_change =mic_trips_change.rename(columns={"mic": "MIC Name", "count_y": "2019 Freight Destinations",
"count_x": "2015 Freight Destinations"})
mic_trips_change['delta,%'] = (mic_trips_change['2019 Freight Destinations']-mic_trips_change['2015 Freight Destinations'])*1.0/mic_trips_change['2015 Freight Destinations']
mic_trips_sum_2015=mic_trips_change["2015 Freight Destinations"].sum()
mic_trips_sum_2019=mic_trips_change["2019 Freight Destinations"].sum()
mic_delta=(mic_trips_sum_2019-mic_trips_sum_2015)/mic_trips_sum_2015
total_delta=(total_d-total_d_2015)/total_d
regional_trips_change=pd.DataFrame([['All Regional', total_d_2015, total_d,total_delta]], columns=['MIC Name', '2015 Freight Destinations', '2019 Freight Destinations', 'delta,%'])
all_mic_trips_change=pd.DataFrame([['All MICs', mic_trips_sum_2015, mic_trips_sum_2019,mic_delta]], columns=['MIC Name', '2015 Freight Destinations', '2019 Freight Destinations', 'delta,%'])
mic_all_trips_change=pd.concat([mic_trips_change, all_mic_trips_change, regional_trips_change])
mic_all_trips_change['2015 Share']= mic_all_trips_change['2015 Freight Destinations']/total_d_2015
mic_all_trips_change['2019 Share']= mic_all_trips_change['2019 Freight Destinations']/total_d
mic_all_trips_change.sort_values(by='delta,%', ascending=False)
mic_all_trips_change.to_csv('C:/GitHub/data-science/inrix/mic_trips_2015_2019.csv')
mic['mic'].unique
<bound method Series.unique of 0 Duwamish 1 Kent MIC 2 Puget Sound Industrial Center- Bremerton 3 Ballard-Interbay 4 Paine Field / Boeing Everett 5 Port of Tacoma 6 Sumner Pacific 7 Frederickson 8 Cascade 9 North Tukwila Name: mic, dtype: object>
mic_trips_change_acres=pd.merge(mic_all_trips_change, mic, left_on='MIC Name', right_on="mic" )
mic_trips_change_acres['2019 Dest per Acre']=mic_trips_change_acres['2019 Freight Destinations']/mic_trips_change_acres['acres']
mic_trips_change_acres['2015 Dest per Acre']=mic_trips_change_acres['2015 Freight Destinations']/mic_trips_change_acres['acres']
total_acres_region=tracts['land_acres'].sum()
total_acres_mics=mic_trips_change_acres['acres'].sum()
total_acres_region
total_acres_mics
Decimal('33525.29212573')
mic_trips_cols=mic_trips_change_acres[["MIC Name", "2015 Dest per Acre", '2019 Dest per Acre', 'acres']]
mic_trips_cols['Percent Region Area']= mic_trips_cols['acres']/total_acres_region
mic_trips_cols['Percent MIC Area']= mic_trips_cols['acres']/total_acres_mics
C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\406837503.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy mic_trips_cols['Percent Region Area']= mic_trips_cols['acres']/total_acres_region C:\Users\SChildress\AppData\Local\Temp\ipykernel_20000\406837503.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy mic_trips_cols['Percent MIC Area']= mic_trips_cols['acres']/total_acres_mics
regional_trips_acres=pd.DataFrame([['All Regional', total_d_2015/total_acres_region, total_d/total_acres_region, total_acres_region, 1, 0]],
columns=['MIC Name', '2015 Dest per Acre', '2019 Dest per Acre', 'acres', 'Percent Region Area', 'Percent MIC Area'])
mic_trips_acres=pd.DataFrame([['All MICs', mic_trips_sum_2015/total_acres_mics, mic_trips_sum_2019/total_acres_mics,total_acres_mics, total_acres_mics/total_acres_region, 1]],
columns=['MIC Name', '2015 Dest per Acre', '2019 Dest per Acre', 'acres', 'Percent Region Area', 'Percent MIC Area'])
mic_all_trips_acres=pd.concat([mic_trips_cols, regional_trips_acres, mic_trips_acres])
mic_all_trips_acres.to_csv('C:/GitHub/data-science/inrix/mic_trips_acres.csv')
d_hex_count=pd.merge(hex_trips_d, hex_map, on=['GRID_ID'])
d_by_hex_count = gpd.GeoDataFrame(d_hex_count, crs="EPSG:4326", geometry='geometry')
d_by_tract_count = pd.merge(tract_trips_d, tracts,on=["geoid10"])
o_by_tract_count = pd.merge(tract_trips_o, tracts,on=["geoid10"])
d_by_tract_count_2015 = pd.merge(tract_trips_d_2015, tracts,on=["geoid10"])
o_by_tract_count_2015 = pd.merge(tract_trips_o_2015, tracts,on=["geoid10"])
d_by_uc_count = pd.merge(uc_trips_d, urban_centers,on=["name"])
o_by_uc_count = pd.merge(uc_trips_o, urban_centers,on=["name"])
d_by_taz_count = pd.merge(taz_trips_d, taz,on=["taz"])
o_by_taz_count = pd.merge(taz_trips_o, taz,on=["taz"])
d_by_taz_count_weekend = pd.merge(taz_trips_d_weekday, taz,on=["taz"])
d_by_tract_count = gpd.GeoDataFrame(d_by_tract_count, crs="EPSG:4326", geometry='geometry')
o_by_tract_count = gpd.GeoDataFrame(o_by_tract_count, crs="EPSG:4326", geometry='geometry')
d_by_tract_count_2015 = gpd.GeoDataFrame(d_by_tract_count_2015, crs="EPSG:4326", geometry='geometry')
o_by_tract_count_2015 = gpd.GeoDataFrame(o_by_tract_count_2015, crs="EPSG:4326", geometry='geometry')
d_by_uc_count = gpd.GeoDataFrame(d_by_uc_count, crs="EPSG:4326", geometry='geometry')
o_by_uc_count = gpd.GeoDataFrame(o_by_uc_count, crs="EPSG:4326", geometry='geometry')
d_by_taz_count = gpd.GeoDataFrame(d_by_taz_count, crs="EPSG:4326", geometry='geometry')
o_by_taz_count = gpd.GeoDataFrame(o_by_taz_count, crs="EPSG:4326", geometry='geometry')
d_by_taz_count = gpd.GeoDataFrame(d_by_taz_count, crs="EPSG:4326", geometry='geometry')
o_by_taz_count = gpd.GeoDataFrame(o_by_taz_count, crs="EPSG:4326", geometry='geometry')
hex_trips_d.to_csv('C:/GitHub/data-science/inrix/truck_trips_by_hex_id.csv')
d_by_taz_count_weekday = gpd.GeoDataFrame(d_by_taz_count_weekend, crs="EPSG:4326", geometry='geometry')
The truck trips origins and destinations are concentrated in areas with industrial activity such as:
Kent Freight Valley (25,600 - 53,000 trips in May 2019)
North of Puyallup (40,300 trips in May 2019)
Port of Tacoma (19,600 - 30,500 trips in May 2019)
Port of Seattle (28,400 trips in May 2019)
Auburn (23,200 trips in May 2019)
SeaTac Airport (16,100 trips in May 2019)
2015 data shows similar areas with the high freight trips volume.
geosource = GeoJSONDataSource(geojson = d_by_hex_count.to_crs(3857).to_json())
output_file("C:/GitHub/data-science/inrix/hex_destinations.html")
output_notebook()
# Define color palettes
tile_provider = get_provider(Vendors.CARTODBPOSITRON)
#tile_provider = get_provider(Vendors.OSM)
palette = brewer['BuPu'][9]
palette = palette[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 4000)
# Define custom tick labels for color bar.
tick_labels = {'0': '0', '50': '50',
'100':'100', '500':'500', '4000':'4000'}
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapper,
label_standoff = 8,
width =400, height = 20,
border_line_color = None,
location = (0,0),
orientation = 'horizontal',
major_label_overrides = tick_labels)
# Create figure object.
p = figure(title = 'Truck Destinations, May 2019',
plot_height = 600, plot_width =600,
toolbar_location = 'below',
tools = 'pan, wheel_zoom, box_zoom, reset',
x_range=(-13580000, -13660000), y_range=(5980000, 6120000) )
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
# Add patch renderer to figure.
states = p.patches('xs','ys', source = geosource,fill_alpha=0.6,
fill_color = {'field':'count',
'transform' : color_mapper},
line_color = 'gray',
line_width = 0.01)
# Create hover tool
p.add_tools(HoverTool(renderers = [states],
tooltips = [('GRID_ID','@GRID_ID'),
('count', '@count')]))
# Specify layout
p.add_layout(color_bar, 'below')
p.add_tile(tile_provider)
show(p)
When we grouped heavy truck trips by TAZs, it looks like that 75% of TAZs are getting up to 28.5 trips per month or less than 1 trip per day. While some TAZs are getting up to 13,800 trips per month or about 443 trips per day. That means that the heavy truck trips are pretty concentrated to specific TAZs.
Heavy truck trips are concentrated on the East (along I-90) and South of the PSRC Region. The "hot spots" include:
North of Sumner (13,800 heavy truck trips)
Port of Seattle (6,500 heavy truck trips)
North Kent (4,400 heavy truck trips)
North Bend (4,300 heavy truck trips)
Failed to start the Kernel. Unable to start Kernel 'Anaconda3 (Python 3.9.12)' due to a timeout waiting for the ports to get used. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.
Medium truck trips are more widely distributed across the PSRC region comparing with heavy truck trips. Some of the biggest hot spots are:
SeaTac airport (15,000 medium truck trips)
Kent Freight Valley
Auburn, North of Puyallup
Port of Tacoma and Port of Seattle
Failed to start the Kernel. Unable to start Kernel 'Anaconda3 (Python 3.9.12)' due to a timeout waiting for the ports to get used. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.